#
# Copyright (c) 2021, 2023 supercell
#
# SPDX-License-Identifier: BSD-3-Clause
#
require "html"
require "uri"

module Luce
  alias DelimiterTypes = Delimiter | SimpleDelimiter

  # Maintains the internal state needed to parse inline span elements
  # in Markdown.
  class InlineParser
    @@default_syntaxes = [
      EmailAutoLinkSyntax.new,
      AutolinkSyntax.new,
      LineBreakSyntax.new,
      # Parse "**strong**" and "*emphasis*" tags.
      EmphasisSyntax.asterisk,
      # Parse "__strong__" and "_emphasis_" tags.
      EmphasisSyntax.underscore,
      CodeSyntax.new,
      SoftLineBreakSyntax.new,
      # We will add the LinkSyntax once we know about the specific link
      # resolver.
    ]

    # The string of Markdown being parsed
    getter source : String

    # The Markdown document this parser is parsing
    getter document : Document

    getter syntaxes = [] of InlineSyntax

    # The current read position
    property pos : Int32 = 0

    # Starting position of the last unconsumed text.
    property start : Int32 = 0

    # The delimiter stack tracking possible opening delimiters and
    # closing delimiters for `DelimiterSyntax` nodes.
    @delimiter_stack : Array(Delimiter) = Array(Delimiter).new

    # The tree of parsed HTML nodes
    @tree = Array(Node).new

    def initialize(@source : String, @document : Document)
      # Specified syntaxes are the first syntaxes to be evaluated
      @syntaxes.concat(document.inline_syntaxes)

      # This first Regex matches plain text to accelerate parsing. It's
      # written so that it does not match any prefix of any following
      # syntaxes. Most Markdown is plain text, so it's faster to match
      # one Regex per 'word' rather than fail to match all the
      # following Regexs at each non-syntax character position.
      if document.has_custom_inline_syntaxes?
        # We should be less aggressive in blowing past "words".
        @syntaxes << TextSyntax.new(%q([A-Za-z0-9]+(?=\s)))
      else
        @syntaxes << TextSyntax.new(%q([ \tA-Za-z0-9]*[A-Za-z0-9](?=\s)))
      end

      if document.with_default_inline_syntaxes?
        # Custom link resolvers go after the generic text syntax.
        @syntaxes.concat([
          EscapeSyntax.new,
          DecodeHtmlSyntax.new,
          LinkSyntax.new(link_resolver: document.link_resolver),
          ImageSyntax.new(link_resolver: document.image_link_resolver),
        ])

        @syntaxes.concat @@default_syntaxes
      end

      if encode_html?
        @syntaxes << EscapeHTMLSyntax.new
        # Leave already-encoded HTML entities alone. Ensures we don't turn
        # "&amp;" into "&amp;amp;"
        @syntaxes << TextSyntax.new("&[#a-zA-Z0-9]*;", start_character: Charcode::AMPERSAND)
      end
    end

    def parse : Array(Node)
      until done?
        # A right backet (']') is special. Hitting this character
        # triggers the "look for link or image" procedure.
        # See https://spec.commonmark.org/0.30/#an-algorithm-for-parsing-nested-emphasis-and-links.
        if char_at(@pos) == Charcode::RBRACKET
          write_text()
          link_or_image()
          next
        end

        # See if the current text matches any defined Markdown syntax
        begin
          next if @syntaxes.any?(&.matches?(self))
        rescue
          # COMPAT FOR CRYSTAL < 1.8 (undefined constant Regex::Error)
          #
          # Regex Match Limit (see https://codeberg.org/supercell/luce/issues/5)
          # This mimics the behaviour of PCRE as used with Crystal < 1.8.
        end

        # If we got here, it's just text.
        advance 1
      end

      # Write any trailing text content to a Text node.
      write_text()
      process_delimiter_run(-1)
      combine_adjacent_text @tree
      @tree
    end

    # Look back through the delimiter stack to see if we've found a
    # link or image.
    #
    # This is the "look for link or image" routine from the CommonMark spec:
    # https://spec.commonmark.org/0.30/#look-for-link-or-image.
    private def link_or_image : Nil
      index = @delimiter_stack.rindex { |delim| delim.char == Charcode::LBRACKET || delim.char == Charcode::EXCLAMATION } || -1
      if index == -1
        # Never found a possible open bracket. This is just a literal
        # "]"
        add_node Text.new("]")
        advance 1
        @start = @pos
        return
      end

      delimiter = @delimiter_stack[index].as SimpleDelimiter
      if !delimiter.active?
        @delimiter_stack.delete_at(index)
        add_node(Text.new("]"))
        advance 1
        @start = @pos
        return
      end
      syntax = delimiter.syntax
      if syntax.is_a? LinkSyntax && @syntaxes.any?(LinkSyntax)
        node_index = @tree.rindex { |node| node == delimiter.node } || -1
        link_nodes = syntax.close(self, delimiter, nil, Proc(Array(Node)).new {
          process_delimiter_run(index)
          # All of the nodes which lie past `index` are children of
          # this link/image
          children = @tree[node_index + 1...@tree.size]
          @tree.delete_at(node_index + 1...@tree.size)
          children
        })
        if !link_nodes.nil?
          @delimiter_stack.delete_at(index)
          if delimiter.char == Charcode::LBRACKET
            @delimiter_stack[0...index].each do |delim|
              delim.active = false if delim.char == Charcode::LBRACKET
            end
          end
          @tree[node_index...@tree.size] = link_nodes
          advance 1
          @start = @pos
        else
          @delimiter_stack.delete_at(index)
          @pos = @start
          advance 1
        end
      else
        raise "Non-link syntax delimiter found with character '#{delimiter.char}'"
      end
    end

    # Rules 9 and 10.
    private def form_emphasis?(opener : Delimiter, closer : Delimiter) : Bool
      if (opener.openable? && opener.closable?) || (closer.openable? && closer.closable?)
        return (opener.size + closer.size) % 3 != 0 ||
          (opener.size % 3 == 0 && closer.size % 3 == 0)
      end
      true
    end

    # Process `DelimiterRun` type delimiters from *bottom_index* and up
    #
    # This is the same strategy as "process emphasis" routine according to the
    # CommonMark spec: https://spec.commonmark.org/0.30/#phase-2-inline-structure
    private def process_delimiter_run(bottom_index : Int32) : Nil
      current_index = bottom_index + 1
      # Track the lowest index where we might find an open delimiter
      # given a closing delimiter length modulo 3.
      # Each key in this hash is an open delimiter character. Each
      # value is a 3-element list. Each value in the list is the lowest
      # index for the given delimiter modulo 3 (0, 1, 2).
      openers_bottom = Hash(Int32, Array(Int32)).new
      while current_index < @delimiter_stack.size
        closer = @delimiter_stack[current_index]
        if !closer.closable? || !(closer.is_a? DelimiterRun)
          current_index += 1
          next
        end
        unless openers_bottom.has_key? closer.char
          openers_bottom[closer.char] = [bottom_index]*3
        end
        openers_bottom_per_closer_size = openers_bottom[closer.char]
        opener_bottom = openers_bottom_per_closer_size[closer.size % 3]
        opener_index = @delimiter_stack.rindex(offset: current_index - 1) { |delim|
          # WARNING: DON'T CHANGE THIS! To save you a few hours of "debugging"
          # (as well as you can in crystal with puts...), recall that negative
          # numbers are a valid index position for arrays, so if the `offset`
          # for this is, say, -2, then the loop will still run. In dart,
          # having a "start" (offset in crystal) position of a negative number
          # means that the loop won't run, and -1 will be returned.
          break if current_index - 1 < 0
          delim.char == closer.char && delim.openable? && form_emphasis?(delim, closer)
        } || -1
        if opener_index > bottom_index && opener_index > opener_bottom
          # Found an opener for `closer`.
          opener = @delimiter_stack[opener_index]
          unless opener.is_a? DelimiterRun
            current_index += 1
            next
          end
          matched_tag_index = opener.tags.rindex { |e|
            opener.size >= e.indicator_length && closer.size >= e.indicator_length
          } || -1
          if matched_tag_index == -1
            current_index += 1
            next
          end
          matched_tag = opener.tags[matched_tag_index]
          indicator_length = matched_tag.indicator_length
          opener_text_node = opener.node
          opener_text_node_index = @tree.index(opener_text_node) || -1
          closer_text_node = closer.node
          closer_text_node_index = @tree.index(closer_text_node) || -1
          nodes = opener.syntax.close(self, opener, closer, tag: matched_tag.tag,
            get_children: ->{
              @tree[opener_text_node_index + 1...closer_text_node_index]
            })
          # Replace all of the nodes betwee the opener and the closer
          # (which are now the new emphasis node's children) with the
          # emphasis node.
          # ameba:disable Lint/NotNil
          @tree[opener_text_node_index + 1...closer_text_node_index] = nodes.not_nil!
          # Slide `closer_text_node_index` back accordingly
          closer_text_node_index = opener_text_node_index + 2

          @delimiter_stack.delete_at(opener_index + 1...current_index)
          # Slide `current_index` back accordingly
          current_index = opener_index + 1

          # Remove delimiter characters, possibly removing nodes from
          # the tree and Delimiters from the delimiter stack.
          if opener.size == indicator_length
            @tree.delete_at(opener_text_node_index)
            @delimiter_stack.delete_at(opener_index)
            # Slide `current_index` and `closer_text_node_index` back
            # accordingly
            current_index -= 1
            closer_text_node_index -= 1
          else
            new_opener_text_node =
              Text.new(opener_text_node.text[indicator_length..].to_s)
            @tree[opener_text_node_index] = new_opener_text_node
            opener.node = new_opener_text_node
          end

          if closer.size == indicator_length
            @tree.delete_at(closer_text_node_index)
            @delimiter_stack.delete_at(current_index)
            # `current_index` has just moved to point at the next
            # delimiter; leave it.
          else
            new_closer_text_node =
              Text.new(closer_text_node.text[indicator_length..].to_s)
            @tree[closer_text_node_index] = new_closer_text_node
            closer.node = new_closer_text_node
            # `current_index` needs to be considered again; leave it.
          end
        else
          # No opener is found
          openers_bottom_per_closer_size[closer.size % 3] = current_index - 1
          if !closer.openable?
            @delimiter_stack.delete_at(current_index)
            # This advances `current_index` to the next delimiter
          else
            current_index += 1
          end
        end
      end

      @delimiter_stack.delete_at(bottom_index + 1...@delimiter_stack.size)
    end

    # Combine any remaining adjacent Text nodes.
    #
    # This is important to produce correct output across newlines,
    # where whitespace is sometimes compressed.
    private def combine_adjacent_text(nodes : Array(Node)) : Nil
      i = 0
      while i < nodes.size - 1
        node = nodes[i]
        if node.is_a? Element && !node.children.nil?
          # ameba:disable Lint/NotNil
          combine_adjacent_text(node.children.not_nil!)
          i += 1
          next
        end
        if node.is_a? Text && nodes[i + 1].is_a? Text
          buffer = String::Builder.new(node.text_content + nodes[i + 1].text_content)
          j = i + 2
          while j < nodes.size && nodes[j].is_a? Text
            buffer << nodes[j].text_content
            j += 1
          end
          nodes[i] = Text.new(buffer.to_s)
          nodes.delete_at(i + 1...j)
        end
        i += 1
      end
    end

    def char_at(index : Int32) : Int32
      @source.codepoint_at(index)
    end

    def write_text : Nil
      return if @pos == @start
      text = @source[@start...@pos]
      @tree << Text.new(text)
      @start = @pos
    end

    # Add *node* to the current tree.
    def add_node(node : Node) : Nil
      @tree << node
    end

    # Push *delimiter* onto the stack of `Delimiter`s
    def push_delimiter(delimiter : Delimiter) : Nil
      @delimiter_stack << delimiter
    end

    def done? : Bool
      @pos == @source.size
    end

    def advance(length : Int32) : Nil
      @pos += length
    end

    def consume(length : Int32) : Nil
      @pos += length
      @start = @pos
    end

    protected def encode_html? : Bool
      @document.encode_html?
    end
  end
end
